from bs4 import BeautifulSoup
from basecrawler import BaseCrawler


class NieHanNew():
    def __init__(self, targetUrl):
        # super(NieHanNew, self, targetUrl).__init__(targetUrl)
        self.__targetUrl = targetUrl

    def __contentList(self, page):
        soup = BeautifulSoup(page, 'lxml')
        return soup.find_all(class_="liL")[0].find_all("li")

    def __builtContentObj(self, element):
        items = {}
        items["title"] = element.a["title"]
        items["link"] = element.a["href"]
        return items

    def __detailPage(self, page):
        soup = BeautifulSoup(page, 'lxml')
        return soup.find(class_="articleBody")

    def __builtDetailPageObj(self, element):
        items = {"link": ""}
        if hasattr(element, "img") and element.img is not None:
            items["link"] = element.img["src"]
        return items

    def download(self):
        parent = BaseCrawler(self.__targetUrl)
        index = 0
        for i in range(1, 15):
            targeUrl = (self.__targetUrl, self.__targetUrl + "/index_" + str(i) + ".html")[i > 1]
            for item in parent.parserPage(targeUrl, elementsFunc=self.__contentList,
                                          itemFunc=self.__builtContentObj):
                url = "http://www.kx1d.com" + item['link']
                for detail in parent.parserPage(url, elementsFunc=self.__detailPage,
                                                itemFunc=self.__builtDetailPageObj):
                    index = index + 1
                    print("url:" + url)
                    print("page:" + str(i) + ", idx:" + str(index) + "title:" + item["title"] + ", link:" + detail[
                        "link"])


if __name__ == "__main__":
    neihan = NieHanNew("http://www.kx1d.com/neihanmanhua/")
    neihan.download()
